global root_dir = "`1'"

include "$root_dir/code/config/config.do"


cap noi log using ${log_dir}/import_indep_vars.log, replace name(dat)

*Handle empty arguments
global arg1 = cond("`2'" == "___EMPTY___", "", "`2'")
global arg2 = cond("`3'" == "___EMPTY___", "", "`3'")
global arg3 = cond("`4'" == "___EMPTY___", "", "`4'")
global arg4 = cond("`5'" == "___EMPTY___", "", "`5'")

if "$arg1" != "" {
    global weight_category "$arg1"
    di "Weight category: ${weight_category}"
}

if "$arg2" != "" {
    global weight_versions "$arg2"
    di "Weight versions: ${weight_versions}"
}

if "$arg3" != "" {
    global weight_window "$arg3"
    di "Weight window: ${weight_window}"
}

if "$arg4" != "" {
	global wtype "$arg4"
}
di "${wtype}"
capture noi {

qui do ${code_dir}/config/labeling_indepvars_function.do
qui do ${code_dir}/config/country_list.do

   *******************************************************************************************
   ** 
   ** Import indep vars
   **
   ********************************************************************************************

* TODO: needs cleaning up;
* this is the interface between the "measures of wages" and the "automation" package;
* I'd prefer if the automation package would just import the measures of wages directly


* List of available countries for wage data


log using ${numb_dir}/import_indep_vars_numbers.log, replace name(numb)

*Low-skilled wages
forvalues yy = 1980(5)2005{
	use ${final_dir}/wage_combined_final_allcodes.dta, clear
	keep if code=="TOT" | code=="D"
	keep if quality < 3
	keep if code=="D"
	keep code year country ls_wage_USD_pi ls_wage_pi_USD_05
	drop if ls_wage_USD_pi==. & ls_wage_pi_USD_05==. 
	gen iso = upper(country)
	drop if year>1995
	keep iso
	duplicates drop
	sort iso
	mmerge iso using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
	drop _m
	if 1995 == 1975 | 1995 == 1980 | 1995 == 1985 | 1995 == 1995 {
	disp "1995"
		list ctry, noobs noheader clean
	}
}

cap log close numb


********************************************************************************************
**			IMPORT DATA
********************************************************************************************


* Wages
foreach sector in MANUF TOTAL {
	if "`sector'" == "MANUF" {
		local code "D"
	}
	else {
		local code "TOT"
	}
	foreach www in ls hs ms {
		use ${final_dir}/wage_combined_final_allcodes.dta, clear
		keep if code=="TOT" | code=="D"
		*restrict to respective sector
		keep if code=="`code'"
		keep year `www'_wage_USD_pi `www'_wage_pi_USD_05 `www'_wage_pi_USD_95 `www'_wage_defGDP_USD_05 `www'_wage_defGDP_USD_95 country 
		drop if `www'_wage_USD_pi==. & `www'_wage_pi_USD_05 ==. & `www'_wage_pi_USD_95==. & `www'_wage_defGDP_USD_05==. & `www'_wage_defGDP_USD_95

		*we drop this deflator. it was used in some erly stages, but not anymore. but since we still dropped above, we also drop now

		drop `www'_wage_defGDP_USD_05

		*make sure we know which country is which
		gen iso_code = upper(country)
		mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
		drop country _m iso_code
		ren `www'_wage_USD_pi `www'wDP_
		ren `www'_wage_pi_USD_05 `www'wLP_
		ren `www'_wage_pi_USD_95 `www'wMP_ 
		ren `www'_wage_defGDP_USD_95 `www'wMG_
		reshape wide `www'wDP_ `www'wLP_ `www'wMP_ `www'wMG_, i( year) j(ctry) string

		*labeling
		labelingvars "`www'"

		compress
		save ${dataset_dir}/indep_vars/`www'wages_wide_`sector', replace

	}


	
	* VA per employee
	use ${final_dir}/wage_combined_final_allcodes.dta, clear
	keep if code=="TOT" | code=="D"
	keep if code=="`code'"
	keep year va_emp_USD_pi va_emp_pi_USD_05 va_emp_pi_USD_95 country va_emp_defGDP_USD_05 va_emp_defGDP_USD_95
	drop if va_emp_USD_pi==. & va_emp_pi_USD_05 ==. & va_emp_pi_USD_95==. & va_emp_defGDP_USD_05==. & va_emp_defGDP_USD_95 == .

	*we drop this deflator. it was used in some erly stages, but not anymore. but since we still dropped above, we also drop now
	drop va_emp_defGDP_USD_05


	gen iso_code = upper(country)
	mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
	drop country _m iso_code
	ren va_emp_USD_pi vaempDP_
	ren va_emp_pi_USD_05 vaempLP_
	ren va_emp_pi_USD_95 vaempMP_ 
	ren va_emp_defGDP_USD_95 vaempMG_
	reshape wide vaempDP_ vaempLP_ vaempMP_ vaempMG_, i( year) j(ctry) string
	drop if year>2009

	*labeling
	labelingvars "vaemp"

	compress
	save ${dataset_dir}/indep_vars/vaemployee_wide_`sector'.dta, replace

	
	* GDP per capita
	use ${final_dir}/wage_combined_final_allcodes.dta, clear
	keep if code=="TOT" | code=="D"
	keep if code=="`code'"
	keep year cap_GDP_nom_own_USD_pi cap_GDP_nom_own_pi_USD_05 cap_GDP_nom_own_pi_USD_95 cap_GDP_real_own_95_USD_95 cap_GDP_real_own_05_USD_05 country
	drop if cap_GDP_nom_own_USD_pi != . & cap_GDP_nom_own_pi_USD_05 ==. & cap_GDP_nom_own_pi_USD_95==. & cap_GDP_real_own_95_USD_95==. & cap_GDP_real_own_05_USD_05==.

	*we drop this deflator. it was used in some early stages, but not anymore. but since we still dropped above, we also drop now
	drop cap_GDP_real_own_05_USD_05

	*make sure we know which country is which
	gen iso_code = upper(country)
	mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
	drop country _m iso_code
	ren cap_GDP_nom_own_USD_pi gdppcDP_
	ren cap_GDP_nom_own_pi_USD_05 gdppcLP_
	ren cap_GDP_nom_own_pi_USD_95 gdppcMP_
	ren cap_GDP_real_own_95_USD_95 gdppcMG_
	reshape wide gdppcDP_ gdppcLP_ gdppcMP_ gdppcMG_, i( year) j(ctry) string
	drop if year>2009

	*labeling
	labelingvars "gdppc"

	compress
	save ${dataset_dir}/indep_vars/gdp_percapita_wide_`sector', replace


	*  deviation from log GDP
	use ${final_dir}/wage_combined_final_allcodes.dta, clear
	keep if code=="TOT" | code=="D"
	keep if code=="`code'"
	keep year loggdp_gap country 
	gen iso_code = upper(country)

	* drop observations without data
	drop if iso_code == "GRB" & loggdp_gap == . 
	mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
	drop country _m iso_code
	ren loggdp_gap lngdpgap_
	drop if year>2009
	drop if year<1970

	*make sure we have the same set of countries
	drop if ctry=="CL"
	drop if ctry=="IL"
	drop if ctry=="NZ"
	reshape wide lngdpgap_, i( year) j(ctry) string

	*labeling
	labelingvars "lngdpgap"

	compress
	save ${dataset_dir}/indep_vars/loggdp_gap_wide_`sector'.dta, replace
	
	
}

* manufacturing share
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
keep if code=="D"
keep year country code_share
drop if code_share == .
gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
drop country _m iso_code
ren code_share mshare_
reshape wide mshare_, i(year) j(ctry) string

*labeling
ds year, not
local r: di r(varlist)
foreach var of local r {
	local countrycode = substr("`var'", 8, .)
	local `countrycode' = "`countrycode'"
	label var `var' "Manufacturing share `countrycode'"
}


compress
save ${dataset_dir}/indep_vars/manufshare_wide_MANUF.dta, replace 

* manufacturing absolute
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
keep if code=="D"
keep year country code_VA_pi_USD_95
drop if code_VA_pi_USD_95 == .
gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
drop country _m iso_code
ren code_VA_pi_USD_95 mVA_
reshape wide mVA_, i(year) j(ctry) string

*labeling
ds year, not
local r: di r(varlist)
foreach var of local r {
	local countrycode = substr("`var'", 5, .)
	local `countrycode' = "`countrycode'"
	label var `var' "VA in manufacturing `countrycode' PI_USD_95"
}


compress
save ${dataset_dir}/indep_vars/manufVA_wide_MANUF.dta, replace

* low-skill weighted manuf size
* prepare manuf sector impuation for switzerland (no industry code data)
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
preserve
*reduce the set to a single observation (the US in 1995)
keep if year == 1995 & country == "usa" & code=="D"

log using ${numb_dir}/import_indep_vars_numbers.log, replace name(numb)

*compute the respective skills levels percentage of the total compensation

local labls_us1995 = LABLS[1] / 100
restore

*keep only manufactoring in switzerland
keep if country == "che" & code == "D" & year >= 1995 & year <= 2009
ren code_VA_pi_USD_95 mVA

*approximate switzerlands low skill manufacturing value added using the overall value added
* times the US compensation share for low skilled work in manufacturing
gen mVAls = `labls_us1995' * mVA

keep year mVAls 
gen ctry_code = "ch"
tempfile ch_imputation
save `ch_imputation', replace


* get low-skill income share of US value added in manufacturing industries in 1995 (same as above but this time as variables, not macros)
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if year == 1995 & country == "usa" 
drop if code == "TOT" | code == "D"
gen labls_us1995 = LABLS / 100

keep year code labls_us1995 
tabstat labls_us1995 , s(mean sd min p10 p50 p90 max n) labelwidth(20) varwidth(20) col(stat) longstub
tempfile uslabls
save `uslabls', replace

cap log close numb

* multiply with VA and VA/emp
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if year >= 1995 & year <= 2009
drop if country == "che"  

* I impute switzerland below (only sector level data)
drop if code == "TOT" | code == "D"
mmerge code using `uslabls', unmatched(both)
gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
drop country _m iso_code
keep year code_share ctry_code code labls_us1995 code_VA_pi_USD_95 
keep if code_VA_pi_USD_95 != .

* value added
ren code_VA_pi_USD_95 mVA
gen mVAls = labls_us1995 * mVA


collapse (sum) mVAls [aw=code_share], by(ctry_code year)
* add switzerland
append using `ch_imputation'
sort ctry_code year
* reshape into wide and save
preserve
keep mVAls year ctry
ren mVAls mVAls_
reshape wide mVAls_, i(year) j(ctry) string

*labeling
ds year, not
local r: di r(varlist)
foreach var of local r {
	local countrycode = substr("`var'", 7, .)
	local `countrycode' = "`countrycode'"
	label var `var' "Low-skill weighted value added `countrycode' in manufacturing PI_USD_95"
}


compress
save ${dataset_dir}/indep_vars/mVAls_wide_MANUF.dta, replace
restore


*  interest rates
use ${dataset_dir}/import/long-term_intr.dta, clear
reshape long
tempfile lintr
save `lintr'

* apply deflator
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
keep if code=="TOT"
keep country year VA_P_95
keep if year>=1994 & year <=2009
gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
mmerge ctry year using `lintr', umatch(country year)
drop country iso_code _merge
ren ctry_code country

bys country (year) : gen inflation = VA_P_95[_n]/ VA_P_95[_n-1]-1 if _n > 1
drop if year == 1994

*generate real interest rates
gen lintrMP_ = L_intr-inflation*100
keep if L_intr != .
drop VA_P_95 inflation L_intr

reshape wide lintrMP_, i(year) j(country) string

ds year, not
local r: di r(varlist)
foreach var of local r {
	local countrycode = substr("`var'", 9, .)
	local `countrycode' = "`countrycode'"
	label var `var' "Long-term interest rates in manufacturing `countrycode' in manufacturing PI_USD_95"
}


save ${dataset_dir}/indep_vars/interestrates_wide_MANUF.dta, replace




* GDP (just cleaning)
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
*use all sectors, not just manufacturing
keep if code == "TOT"
keep year GDP_nom_own_USD_defGDP country 

*drop missing data and Puerto Rico
drop if GDP_nom_own_USD_defGDP == . 
drop if country == "pri"

gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
drop country _m iso_code
ren GDP_nom_own_USD_defGDP gdp_

reshape wide gdp_, i( year) j(ctry) string
compress
save ${dataset_dir}/indep_vars/gdp_wide.dta, replace

* Total Low-skilled labor compensation (also just cleaning)
use ${final_dir}/wage_combined_final_allcodes.dta, clear
keep if code=="TOT" | code=="D"
keep if code=="D" & tot_pay_ls_USD_95 != .
keep country year tot_pay_ls_USD_95

gen iso_code = upper(country)
mmerge iso_code using ${commondata_dir}/patstat_2018b/country_codes.dta, unmatched(master) umatch(iso) ukeep(ctry)
drop country _m iso_code
ren tot_pay_ls_USD_95 totlsw_
reshape wide totlsw_, i( year) j(ctry) string
compress
save ${dataset_dir}/indep_vars/totlsw_wide.dta, replace






********************************************************************************************
**			VERIFY DATA CONSISTENCY WITH COUNTRYLISTS
********************************************************************************************

foreach sector in MANUF TOTAL {
	foreach www in ls hs ms {
		use ${dataset_dir}/indep_vars/`www'wages_wide_`sector'.dta, clear
		keep if year >= 1995
		keep if year <= 2009
		foreach ctry of global countrylist1995 {
			cap noi assert !missing(`www'wLP_`ctry')
			if _rc != 0 {
				di "Missing `sector' `www': `ctry'"
			}
		}
	}
}

use ${dataset_dir}/indep_vars/gdp_percapita_wide_TOTAL.dta, clear
keep if year >= 1995
keep if year <= 2009
mmerge year using ${dataset_dir}/indep_vars/loggdp_gap_wide_TOTAL.dta, unmatched(master)
foreach ctry of global countrylist1995{
	cap noi assert !missing(gdppcLP_`ctry')
	if _rc != 0 {
		di "Missing gdppc: `ctry'"
	}
	cap noi assert !missing(lngdpgap_`ctry')
	if _rc != 0 {
		di "Missing lngdpgap: `ctry'"
	}
}

use ${dataset_dir}/indep_vars/gdp_wide.dta,clear
drop if year>=1995
drop if year<1995-5
drop year
collapse (mean) gdp_*
foreach ctry of global countrylist1995{
	cap noi assert !missing(gdp_`ctry')
	if _rc != 0 {
		di "Missing GDP: `ctry'"
	}
}


}
if _rc == 0 {
    display "Execution finished successfully."
}
else {
    display "Execution finished with errors."
}

cap log close dat